library(tidyverse)
tidyverse_logo()
* __ _ __ . o * .
/ /_(_)__/ /_ ___ _____ _______ ___
/ __/ / _ / // / |/ / -_) __(_-</ -_)
\__/_/\_,_/\_, /|___/\__/_/ /___/\__/
* . /___/ o . *
tidyverse::tidyverse_logo()
* __ _ __ . o * .
/ /_(_)__/ /_ ___ _____ _______ ___
/ __/ / _ / // / |/ / -_) __(_-</ -_)
\__/_/\_,_/\_, /|___/\__/_/ /___/\__/
* . /___/ o . *
devtools::install_github("codeclan/CodeClanData")
WARNING: Rtools is required to build R packages, but is not currently installed.
Please download and install Rtools 4.0 from https://cran.r-project.org/bin/windows/Rtools/.
Skipping install of 'CodeClanData' from a github remote, the SHA1 (d46cb3c5) has not changed since last install.
Use `force = TRUE` to force installation
library(CodeClanData)
students
all_deaths
#checking our data
#number of rows
nrow(all_deaths)
[1] 917
#number of colums
ncol(all_deaths)
[1] 13
# overall dimensions
dim(all_deaths)
[1] 917 13
# variable names
names(all_deaths)
[1] "name" "allegiances" "year_of_death" "book_of_death"
[5] "death_chapter" "book_intro_chapter" "gender" "nobility"
[9] "book1_GoT" "book2_CoK" "book3_SoS" "book4_FfC"
[13] "book5_DwD"
# check the first 10 rows
head(all_deaths, 10)
# check the last 10
tail(all_deaths, 10)
# get an overveiw of the data
glimpse(all_deaths)
Rows: 917
Columns: 13
$ name <chr> "Addam Marbrand", "Aegon Frey (Jinglebell)", "Aegon Targaryen", "Adrack Hum~
$ allegiances <chr> "Lannister", "None", "House Targaryen", "House Greyjoy", "Lannister", "Bara~
$ year_of_death <dbl> NA, 299, NA, 300, NA, NA, 300, 300, NA, NA, 299, NA, 300, NA, NA, NA, 299, ~
$ book_of_death <dbl> NA, 3, NA, 5, NA, NA, 4, 5, NA, NA, 2, NA, 5, NA, NA, NA, 2, NA, NA, 4, NA,~
$ death_chapter <dbl> NA, 51, NA, 20, NA, NA, 35, NA, NA, NA, 56, NA, 4, NA, NA, NA, 46, NA, NA, ~
$ book_intro_chapter <dbl> 56, 49, 5, 20, NA, NA, 21, 59, 11, 0, 50, 54, 18, 15, 38, 26, 4, 6, 65, 36,~
$ gender <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, ~
$ nobility <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, ~
$ book1_GoT <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, ~
$ book2_CoK <dbl> 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, ~
$ book3_SoS <dbl> 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, ~
$ book4_FfC <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, ~
$ book5_DwD <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ~
# view data
view(all_deaths)
#view a shirt representation of the data
str(all_deaths)
spec_tbl_df [917 x 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ name : chr [1:917] "Addam Marbrand" "Aegon Frey (Jinglebell)" "Aegon Targaryen" "Adrack Humble" ...
$ allegiances : chr [1:917] "Lannister" "None" "House Targaryen" "House Greyjoy" ...
$ year_of_death : num [1:917] NA 299 NA 300 NA NA 300 300 NA NA ...
$ book_of_death : num [1:917] NA 3 NA 5 NA NA 4 5 NA NA ...
$ death_chapter : num [1:917] NA 51 NA 20 NA NA 35 NA NA NA ...
$ book_intro_chapter: num [1:917] 56 49 5 20 NA NA 21 59 11 0 ...
$ gender : num [1:917] 1 1 1 1 1 1 1 0 1 1 ...
$ nobility : num [1:917] 1 1 1 1 1 1 1 1 1 0 ...
$ book1_GoT : num [1:917] 1 0 0 0 0 0 1 1 0 0 ...
$ book2_CoK : num [1:917] 1 0 0 0 0 1 0 1 1 0 ...
$ book3_SoS : num [1:917] 1 1 0 0 1 1 1 1 0 1 ...
$ book4_FfC : num [1:917] 1 0 0 0 0 0 1 0 1 0 ...
$ book5_DwD : num [1:917] 0 0 1 1 0 0 0 1 0 0 ...
- attr(*, "spec")=
.. cols(
.. name = col_character(),
.. allegiances = col_character(),
.. year_of_death = col_double(),
.. book_of_death = col_double(),
.. death_chapter = col_double(),
.. book_intro_chapter = col_double(),
.. gender = col_double(),
.. nobility = col_double(),
.. book1_GoT = col_double(),
.. book2_CoK = col_double(),
.. book3_SoS = col_double(),
.. book4_FfC = col_double(),
.. book5_DwD = col_double()
.. )
# print data in the console
all_deaths
# selecting a few variables to keep
select(all_deaths, name, allegiances, gender, nobility, year_of_death)
# can remove a colum if needed
select(all_deaths, -name)
# making a var
death_no_names <- select(all_deaths, -name)
books <- select(all_deaths, 4, 6, 9:13)
books
books_anotherway <- select(all_deaths, 4, 6, book1_GoT:book5_DwD)
books_anotherway
god <- select(all_deaths, contains("book"))
god
deaths <- select(all_deaths, 3:5)
deaths
filter(all_deaths, allegiances == "Lannister")
# found 102 rows
filter(all_deaths, allegiances == "Lannister" |
allegiances == "House Lannister")
filter(all_deaths, allegiances != "Lannister")
filter(all_deaths, allegiances %in% c("House Lannister",
"Lannister"))
filter(all_deaths, year_of_death >= 299)
filter(all_deaths, allegiances == "None")
filter(all_deaths, book_intro_chapter >= 5 &
book_intro_chapter <= 10)
#Tast 1 Find where the year_of_deathis less than or equal to 299.
filter(all_deaths, year_of_death <= 299)
#Tast 2 Find the females (gender is 0) who are not Lannisters
filter(all_deaths, gender == 0 & allegiances != "Lannister")
#Tast 3 Find just the data for the characters “Jon Snow”, “Daenerys Targaryen” and “Samwell Tarly”.
filter(all_deaths, name == "Jon Snow" |
name == "Daenerys Targaryen" |
name == "Samwell Tarly")
filter(all_deaths, name %in% c("Jon Snow",
"Daenerys Targaryen",
"Samwell Tarly"))
arrange(all_deaths, gender)
arrange(all_deaths, desc(gender))
arrange(all_deaths, book_of_death, death_chapter)
#Arrange all_deaths by allegiances. What happens when you arrange by a character column?
arrange(all_deaths, allegiances)
#Arrange all_deaths by allegiances and book_intro_chapter
arrange(all_deaths, allegiances, book_intro_chapter)
#Arrange all_deaths by descending year_of_death
arrange(all_deaths, desc(year_of_death))
# To make a new var/change a var
mutate(all_deaths, years_survived = year_of_death - 298)
mutate(all_deaths, book_of_death = book_of_death * 5)
mutate(all_deaths, year_of_death = is.na(year_of_death))
# changes year_of_death from dbl to a Lgl
mutate(all_deaths, book_of_death = as.character(book_of_death))
mutate(all_deaths, name = as.numeric(name))
Warning: Problem with `mutate()` column `name`.
i `name = as.numeric(name)`.
i NAs introduced by coercion
mutate(all_deaths, year_of_death = sum(year_of_death, na.rm = TRUE))
mutate( all_deaths, year_of_death = mean(year_of_death, na.rm = TRUE))
summarise(all_deaths, n_males = sum(gender))
death_grouped <- group_by(all_deaths, allegiances)
death_grouped
# is the background Groups:allegiances [21]
summarise(death_grouped, character_count = n())
death_grouped <- group_by(all_deaths, nobility, gender)
summarise(death_grouped, character_count = n())
`summarise()` has grouped output by 'nobility'. You can override using the `.groups` argument.
# find the people taht have died
have_died <- filter(all_deaths, !is.na(book_of_death))
died_grouped <- group_by(have_died, allegiances)
died_counts <- summarise(died_grouped, count = n())
arrange(died_counts, desc(count))
# takes the line before the %>% and places as the first argument
all_deaths %>%
filter(!is.na(book_of_death)) %>%
group_by(allegiances) %>%
summarise(count = n()) %>%
arrange(desc(count))
# docent need to make mutual var
death_by_allegiance <- all_deaths %>%
filter(!is.na(book_of_death)) %>%
group_by(allegiances) %>%
summarise(count = n()) %>%
arrange(desc(count))
av_year_of_death <- all_deaths %>%
summarise(av_value = mean(year_of_death,
na.rm = TRUE))
av_year_of_death
all_deaths %>%
mutate(death_later_than_av = year_of_death >
av_year_of_death)
all_deaths %>%
mutate(death_later_than_av = year_of_death >
299.1574)
class(av_year_of_death)
[1] "numeric"
class(299.1574)
[1] "numeric"
av_year_of_death <- all_deaths %>%
summarise(av_value = mean(year_of_death,
na.rm = TRUE)) %>%
pull()
av_year_of_death
[1] 299.1574
all_deaths %>%
mutate(death_later_than_av = year_of_death >
av_year_of_death)